{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas\n",
    "import os.path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def getData(fromCache=True):\n",
    "    dataDirectory = '../../data/census/Census_2010/'\n",
    "    outputName = 'census2010_clean.csv'\n",
    "    \n",
    "    if fromCache & os.path.isfile(dataDirectory + outputName):\n",
    "        return pandas.read_csv(dataDirectory + outputName, header=0)\n",
    "    \n",
    "    else:\n",
    "        # Retrieved 10-07-2016 from http://factfinder.census.gov/bkmk/table/1.0/en/DEC/10_SF1/P1/0500000US04007.15000|0500000US04013.15000|0500000US04021.15000|0500000US04025.15000\n",
    "        totalPopulation = pandas.read_csv(dataDirectory + 'total_population/DEC_10_SF1_P1_with_ann.csv', \n",
    "                                          header=1)\n",
    "        # Retrieved 10-07-2016 from http://factfinder.census.gov/bkmk/table/1.0/en/DEC/10_SF1/H11/0500000US04007.15000|0500000US04013.15000|0500000US04021.15000|0500000US04025.15000\n",
    "        occupiedHousingTenure = pandas.read_csv(dataDirectory + 'occupied_housing_tenure/DEC_10_SF1_H11_with_ann.csv',  \n",
    "                                                header=1)\n",
    "        # Retrieved 10-07-2016 from http://factfinder.census.gov/bkmk/table/1.0/en/DEC/10_SF1/P13/0500000US04007.15000|0500000US04013.15000|0500000US04021.15000|0500000US04025.15000\n",
    "        medianAgeSex = pandas.read_csv(dataDirectory + 'median_age_sex/DEC_10_SF1_P13_with_ann.csv',  \n",
    "                                       header=1)\n",
    "        # Retrieved 10-07-2016 from http://factfinder.census.gov/bkmk/table/1.0/en/DEC/10_SF1/QTP11/0500000US04007.15000|0500000US04013.15000|0500000US04021.15000|0500000US04025.15000\n",
    "        householdsFamilies = pandas.read_csv(dataDirectory + 'households_families/DEC_10_SF1_QTP11_with_ann.csv',  \n",
    "                                             header=1)\n",
    "        # Retrieved 10-07-2016 from http://factfinder.census.gov/bkmk/table/1.0/en/DEC/10_SF1/QTP11/0500000US04007.15000|0500000US04013.15000|0500000US04021.15000|0500000US04025.15000\n",
    "        race = pandas.read_csv(dataDirectory + 'hispanic_latino/DEC_10_SF1_P11_with_ann.csv',  \n",
    "                               header=1)\n",
    "\n",
    "        selectedData = (pandas.DataFrame(totalPopulation[['Id2','Total']])\n",
    "                        .merge(occupiedHousingTenure[['Id2',\n",
    "                                                     'Owned with a mortgage or a loan',\n",
    "                                                     'Owned free and clear',\n",
    "                                                     'Renter occupied'\n",
    "                                                    ]], \n",
    "                               on='Id2', how='outer'\n",
    "                             )\n",
    "                        .merge(medianAgeSex[['Id2',\n",
    "                                             'Median age -- - Both sexes'\n",
    "                                           ]], \n",
    "                              on='Id2', how='outer'\n",
    "                             )\n",
    "                        .merge(householdsFamilies[['Id2',\n",
    "                                                  'Number; HOUSEHOLD TYPE - Total households',\n",
    "                                                  'Number; HOUSEHOLD TYPE - Total households - Family households [1]',\n",
    "                                                  'Number; HOUSEHOLD SIZE - Total households - 1-person household',\n",
    "                                                  'Number; HOUSEHOLD SIZE - Total households - Average household size'\n",
    "                                                 ]], \n",
    "                               on='Id2', how='outer'\n",
    "                             )\n",
    "                        .merge(race[['Id2',\n",
    "                                    'Hispanic or Latino',\n",
    "                                    'Not Hispanic or Latino: - Population of one race: - White alone',\n",
    "                                    'Not Hispanic or Latino: - Population of one race: - Black or African American alone',\n",
    "                                    'Not Hispanic or Latino: - Population of one race: - American Indian and Alaska Native alone',\n",
    "                                    'Not Hispanic or Latino: - Population of one race: - Asian alone'    \n",
    "                                   ]], \n",
    "                               on='Id2', how='outer'\n",
    "                             )\n",
    "                        .rename(columns={'Id2': 'GEOID',\n",
    "                                         'Total': 'population_total',\n",
    "                                         'Owned with a mortgage or a loan': 'home_mortgages',\n",
    "                                         'Owned free and clear': 'home_owners',\n",
    "                                         'Renter occupied': 'renters',\n",
    "                                         'Median age -- - Both sexes': 'median_age',\n",
    "                                         'Number; HOUSEHOLD TYPE - Total households': 'total_households',\n",
    "                                         'Number; HOUSEHOLD TYPE - Total households - Family households [1]': \n",
    "                                             'family_households',\n",
    "                                         'Number; HOUSEHOLD SIZE - Total households - 1-person household': \n",
    "                                             'single_households',\n",
    "                                         'Number; HOUSEHOLD SIZE - Total households - Average household size': \n",
    "                                             'average_household_size',\n",
    "                                         'Hispanic or Latino': 'population_hispanic_latino',\n",
    "                                         'Not Hispanic or Latino: - Population of one race: - White alone': \n",
    "                                             'population_white',\n",
    "                                         'Not Hispanic or Latino: - Population of one race: - Black or African American alone': \n",
    "                                             'population_black',\n",
    "                                         'Not Hispanic or Latino: - Population of one race: - American Indian and Alaska Native alone': \n",
    "                                             'population_native_american',\n",
    "                                         'Not Hispanic or Latino: - Population of one race: - Asian alone': \n",
    "                                             'population_asian'\n",
    "                                        }\n",
    "                               )\n",
    "                        )\n",
    "    \n",
    "        selectedData.to_csv(dataDirectory + outputName, index=False)\n",
    "        return selectedData"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
